Esta é uma comparação do custo de vida em várias cidades, conforme coletado pelo site popular numbeo . Todos os dados pertencem a eles e foram compartilhados com permissão.A moeda é o Euro. https://www.kaggle.com/datasets/stephenofarrell/cost-of-living
# Imports
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.offline as pyo
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium import Circle
import geopy
import watermark
%reload_ext watermark
%watermark --iversions
# pode haver outras importações do decorrer do notebook
geopy : 2.2.0 numpy : 1.21.5 seaborn : 0.11.2 matplotlib: 3.5.1 watermark : 2.3.1 folium : 0.12.1.post1 plotly : 5.6.0 pandas : 1.4.2
# Configurações de exibição do pandas
pd.options.display.max_columns = 180
pd.options.display.max_rows = 180
# Carregando os dados
df = pd.read_csv('data/cost-of-living.csv')
df.shape
(55, 161)
df.head(3)
| Unnamed: 0 | Saint Petersburg, Russia | Istanbul, Turkey | Izmir, Turkey | Helsinki, Finland | Chisinau, Moldova | Milan, Italy | Cairo, Egypt | Banja Luka, Bosnia And Herzegovina | Baku, Azerbaijan | Guadalajara, Mexico | Kathmandu, Nepal | Hanoi, Vietnam | Ho Chi Minh City, Vietnam | Mexico City, Mexico | Rome, Italy | Monterrey, Mexico | Yekaterinburg, Russia | Sarajevo, Bosnia And Herzegovina | Kharkiv, Ukraine | Kiev, Ukraine | Calgary, Canada | Tunis, Tunisia | Edmonton, Canada | Amsterdam, Netherlands | Belgrade, Serbia | Odessa, Ukraine | Paris, France | Eindhoven, Netherlands | Plovdiv, Bulgaria | Thessaloniki, Greece | Ottawa, Canada | Sofia, Bulgaria | Rotterdam, Netherlands | Varna, Bulgaria | Novi Sad, Serbia | Utrecht, Netherlands | Berlin, Germany | Beirut, Lebanon | Austin, TX, United States | Singapore, Singapore | Toronto, Canada | Auckland, New Zealand | Podgorica, Montenegro | Vancouver, Canada | Tokyo, Japan | Victoria, Canada | Winnipeg, Canada | Boston, MA, United States | Chicago, IL, United States | Almaty, Kazakhstan | Oslo, Norway | Frankfurt, Germany | Bratislava, Slovakia | Dallas, TX, United States | Zagreb, Croatia | Hamburg, Germany | Krakow (Cracow), Poland | Riga, Latvia | Gdansk, Poland | Santiago, Chile | Nairobi, Kenya | Abu Dhabi, United Arab Emirates | Houston, TX, United States | Tbilisi, Georgia | Dubai, United Arab Emirates | Bogota, Colombia | Brno, Czech Republic | Munich, Germany | Poznan, Poland | Las Vegas, NV, United States | London, United Kingdom | Los Angeles, CA, United States | Panama City, Panama | Seoul, South Korea | Warsaw, Poland | Prague, Czech Republic | Wroclaw, Poland | Kuala Lumpur, Malaysia | New York, NY, United States | Copenhagen, Denmark | Ljubljana, Slovenia | Chandigarh, India | Colombo, Sri Lanka | Noida, India | Kaunas, Lithuania | Athens, Greece | Phoenix, AZ, United States | Hong Kong, Hong Kong | Portland, OR, United States | Lisbon, Portugal | Beijing, China | Cape Town, South Africa | Tirana, Albania | Porto, Portugal | Durban, South Africa | Budapest, Hungary | Vilnius, Lithuania | Johannesburg, South Africa | Barcelona, Spain | San Diego, CA, United States | San Francisco, CA, United States | Lima, Peru | Seattle, WA, United States | Brasov, Romania | Bucharest, Romania | Tashkent, Uzbekistan | Ahmedabad, India | Cluj-Napoca, Romania | Madrid, Spain | Tallinn, Estonia | Bangalore, India | Iasi, Romania | Chennai, India | Doha, Qatar | Delhi, India | Gurgaon, India | Valencia, Spain | Vienna, Austria | Hyderabad, India | Montevideo, Uruguay | Tel Aviv-Yafo, Israel | Timisoara, Romania | Taipei, Taiwan | Kolkata, India | Skopje, Macedonia | Shanghai, China | Bangkok, Thailand | Mumbai, India | Reykjavik, Iceland | Amman, Jordan | Pune, India | Stockholm, Sweden | Buenos Aires, Argentina | Minsk, Belarus | San Jose, Costa Rica | Casablanca, Morocco | Lodz, Poland | Montreal, Canada | Sao Paulo, Brazil | Gothenburg, Sweden | Dublin, Ireland | Moscow, Russia | Santo Domingo, Dominican Republic | Adelaide, Australia | Zurich, Switzerland | Yerevan, Armenia | Manila, Philippines | Brisbane, Australia | Jakarta, Indonesia | Ankara, Turkey | Lviv, Ukraine | Novosibirsk, Russia | Bursa, Turkey | Brussels, Belgium | Jerusalem, Israel | Melbourne, Australia | Perth, Australia | Sydney, Australia | Alexandria, Egypt | Quito, Ecuador | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Meal, Inexpensive Restaurant | 7.34 | 4.58 | 3.06 | 12.0 | 4.67 | 15.0 | 3.38 | 3.58 | 5.27 | 5.25 | 1.99 | 1.94 | 1.94 | 4.77 | 15.0 | 5.75 | 5.88 | 3.57 | 4.50 | 5.63 | 13.75 | 1.92 | 13.75 | 15.0 | 5.39 | 5.65 | 15.0 | 15.0 | 5.10 | 10.0 | 11.00 | 6.14 | 13.0 | 6.12 | 4.84 | 15.0 | 8.5 | 8.98 | 13.48 | 8.33 | 13.75 | 11.95 | 5.0 | 11.69 | 8.19 | 12.05 | 10.31 | 13.47 | 13.47 | 4.77 | 18.70 | 10.0 | 6.0 | 13.48 | 6.72 | 11.0 | 5.91 | 7.0 | 5.91 | 6.94 | 4.43 | 6.12 | 13.48 | 4.67 | 7.33 | 3.30 | 5.95 | 12.0 | 5.90 | 13.47 | 17.49 | 13.47 | 8.98 | 6.42 | 5.91 | 5.95 | 5.91 | 2.87 | 17.97 | 17.40 | 8.0 | 2.54 | 1.49 | 3.80 | 6.0 | 10.0 | 10.77 | 5.78 | 12.58 | 8.5 | 3.91 | 8.91 | 4.10 | 7.0 | 7.47 | 5.99 | 7.0 | 8.47 | 11.0 | 13.48 | 15.95 | 3.26 | 13.48 | 6.27 | 6.28 | 3.59 | 1.90 | 5.23 | 12.0 | 8.0 | 2.53 | 5.23 | 1.52 | 6.18 | 3.80 | 3.80 | 8.75 | 10.00 | 1.90 | 8.41 | 15.54 | 5.23 | 3.61 | 2.53 | 4.87 | 4.56 | 1.78 | 3.80 | 17.96 | 6.34 | 3.16 | 11.40 | 5.99 | 8.93 | 6.29 | 4.45 | 4.72 | 10.31 | 6.58 | 10.45 | 15.0 | 8.80 | 5.10 | 11.19 | 23.12 | 4.70 | 3.56 | 12.38 | 2.62 | 3.82 | 3.75 | 5.72 | 3.82 | 15.0 | 15.56 | 10.22 | 12.43 | 11.81 | 2.81 | 3.59 |
| 1 | Meal for 2 People, Mid-range Restaurant, Three... | 29.35 | 15.28 | 12.22 | 65.0 | 20.74 | 60.0 | 17.48 | 22.99 | 23.73 | 23.86 | 11.92 | 15.52 | 17.50 | 23.86 | 55.0 | 23.94 | 29.42 | 20.42 | 18.76 | 22.51 | 42.97 | 12.78 | 48.12 | 65.0 | 24.62 | 20.67 | 50.0 | 56.5 | 20.42 | 35.0 | 55.00 | 25.57 | 50.0 | 20.42 | 17.85 | 65.0 | 40.0 | 53.91 | 44.92 | 39.98 | 55.00 | 50.78 | 25.0 | 55.00 | 40.97 | 49.93 | 41.25 | 62.85 | 53.87 | 23.84 | 80.88 | 50.0 | 30.0 | 44.92 | 33.58 | 50.0 | 23.62 | 40.0 | 23.62 | 34.68 | 26.58 | 48.92 | 53.91 | 18.69 | 48.89 | 19.24 | 25.19 | 60.0 | 23.61 | 53.87 | 64.15 | 58.36 | 35.94 | 31.11 | 28.35 | 29.75 | 25.99 | 16.60 | 76.37 | 80.29 | 35.0 | 10.16 | 14.90 | 12.66 | 30.0 | 40.0 | 53.87 | 46.26 | 44.92 | 35.0 | 26.04 | 35.81 | 24.62 | 30.0 | 25.53 | 29.96 | 35.0 | 31.37 | 40.0 | 53.91 | 71.88 | 21.71 | 62.89 | 20.91 | 27.21 | 15.72 | 8.86 | 23.00 | 40.0 | 45.0 | 12.66 | 20.93 | 7.59 | 49.47 | 15.19 | 12.66 | 35.00 | 45.00 | 8.86 | 31.23 | 64.75 | 20.91 | 24.04 | 10.76 | 16.25 | 26.04 | 23.74 | 15.19 | 109.16 | 38.02 | 12.66 | 66.47 | 20.97 | 27.95 | 42.47 | 20.63 | 23.61 | 44.69 | 28.51 | 75.97 | 60.0 | 36.69 | 34.01 | 40.39 | 92.46 | 22.54 | 17.82 | 49.54 | 16.40 | 15.28 | 18.76 | 22.01 | 11.47 | 60.0 | 62.24 | 49.54 | 56.55 | 54.37 | 14.06 | 31.45 |
| 2 | McMeal at McDonalds (or Equivalent Combo Meal) | 4.40 | 3.82 | 3.06 | 8.0 | 4.15 | 8.0 | 4.51 | 3.58 | 4.22 | 4.25 | 5.56 | 3.88 | 3.87 | 4.77 | 8.0 | 4.31 | 4.12 | 4.08 | 3.75 | 3.75 | 6.87 | 3.19 | 6.53 | 8.0 | 5.09 | 3.67 | 8.7 | 8.0 | 4.59 | 6.5 | 6.87 | 4.60 | 8.0 | 4.08 | 4.42 | 8.0 | 7.0 | 7.19 | 7.19 | 5.33 | 7.56 | 6.57 | 3.0 | 6.87 | 5.70 | 6.89 | 6.80 | 7.18 | 7.18 | 3.81 | 11.12 | 8.0 | 6.0 | 6.51 | 5.37 | 8.0 | 4.72 | 5.0 | 4.25 | 5.20 | 6.20 | 6.24 | 6.74 | 4.05 | 6.11 | 4.40 | 5.16 | 8.0 | 4.72 | 7.18 | 7.00 | 7.18 | 6.06 | 5.06 | 4.72 | 5.55 | 4.72 | 3.32 | 8.09 | 10.04 | 5.0 | 3.30 | 4.22 | 3.16 | 5.0 | 6.0 | 6.73 | 4.05 | 6.74 | 6.0 | 4.56 | 3.74 | 4.76 | 6.0 | 3.74 | 4.79 | 5.0 | 3.45 | 7.0 | 6.74 | 8.98 | 4.21 | 7.19 | 4.18 | 4.19 | 2.70 | 3.16 | 4.18 | 8.0 | 6.0 | 3.16 | 4.19 | 3.35 | 6.18 | 3.16 | 3.16 | 7.00 | 7.25 | 3.80 | 6.96 | 12.95 | 4.18 | 3.91 | 3.79 | 3.25 | 4.56 | 5.04 | 3.80 | 11.64 | 6.34 | 3.16 | 7.60 | 4.94 | 4.49 | 6.29 | 5.16 | 4.25 | 6.87 | 5.92 | 7.60 | 8.0 | 4.40 | 5.10 | 6.84 | 12.94 | 4.60 | 2.67 | 7.43 | 3.28 | 3.51 | 3.56 | 3.67 | 3.06 | 8.2 | 12.97 | 7.12 | 7.32 | 7.15 | 3.38 | 5.39 |
df = df.T
df.rename(columns=df.iloc[0], inplace=True)
df.drop(df.index[0], inplace=True)
df = df.reset_index()
df.rename(columns={'index':'Location'}, inplace=True)
df.head()
| Location | Meal, Inexpensive Restaurant | Meal for 2 People, Mid-range Restaurant, Three-course | McMeal at McDonalds (or Equivalent Combo Meal) | Domestic Beer (0.5 liter draught) | Imported Beer (0.33 liter bottle) | Coke/Pepsi (0.33 liter bottle) | Water (0.33 liter bottle) | Milk (regular), (1 liter) | Loaf of Fresh White Bread (500g) | Eggs (regular) (12) | Local Cheese (1kg) | Water (1.5 liter bottle) | Bottle of Wine (Mid-Range) | Domestic Beer (0.5 liter bottle) | Imported Beer (0.33 liter bottle) | Cigarettes 20 Pack (Marlboro) | One-way Ticket (Local Transport) | Chicken Breasts (Boneless, Skinless), (1kg) | Monthly Pass (Regular Price) | Gasoline (1 liter) | Volkswagen Golf | Apartment (1 bedroom) in City Centre | Apartment (1 bedroom) Outside of Centre | Apartment (3 bedrooms) in City Centre | Apartment (3 bedrooms) Outside of Centre | Basic (Electricity, Heating, Cooling, Water, Garbage) for 85m2 Apartment | 1 min. of Prepaid Mobile Tariff Local (No Discounts or Plans) | Internet (60 Mbps or More, Unlimited Data, Cable/ADSL) | Fitness Club, Monthly Fee for 1 Adult | Tennis Court Rent (1 Hour on Weekend) | Cinema, International Release, 1 Seat | 1 Pair of Jeans (Levis 501 Or Similar) | 1 Summer Dress in a Chain Store (Zara, H&M, ...) | 1 Pair of Nike Running Shoes (Mid-Range) | 1 Pair of Men Leather Business Shoes | Price per Square Meter to Buy Apartment in City Centre | Price per Square Meter to Buy Apartment Outside of Centre | Average Monthly Net Salary (After Tax) | Mortgage Interest Rate in Percentages (%), Yearly, for 20 Years Fixed-Rate | Taxi Start (Normal Tariff) | Taxi 1km (Normal Tariff) | Taxi 1hour Waiting (Normal Tariff) | Apples (1kg) | Oranges (1kg) | Potato (1kg) | Lettuce (1 head) | Cappuccino (regular) | Rice (white), (1kg) | Tomato (1kg) | Banana (1kg) | Onion (1kg) | Beef Round (1kg) (or Equivalent Back Leg Red Meat) | Toyota Corolla 1.6l 97kW Comfort (Or Equivalent New Car) | Preschool (or Kindergarten), Full Day, Private, Monthly for 1 Child | International Primary School, Yearly for 1 Child | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Saint Petersburg, Russia | 7.34 | 29.35 | 4.4 | 2.2 | 2.2 | 0.76 | 0.53 | 0.98 | 0.71 | 1.18 | 7.6 | 0.63 | 5.87 | 0.88 | 1.89 | 2.2 | 0.59 | 3.96 | 42.56 | 0.67 | 19289.39 | 524.45 | 344.27 | 1012.53 | 615.19 | 102.17 | 0.03 | 6.96 | 26.27 | 18.33 | 5.14 | 71.86 | 38.25 | 74.88 | 100.72 | 2476.05 | 1507.7 | 645.94 | 10.29 | 1.47 | 0.26 | 4.4 | 1.29 | 1.25 | 0.56 | 0.86 | 1.96 | 0.92 | 1.91 | 0.89 | 0.48 | 7.18 | 19305.29 | 411.83 | 5388.86 |
| 1 | Istanbul, Turkey | 4.58 | 15.28 | 3.82 | 3.06 | 3.06 | 0.64 | 0.24 | 0.71 | 0.36 | 1.62 | 5.32 | 0.33 | 7.64 | 1.79 | 2.48 | 2.75 | 0.41 | 3.5 | 31.32 | 1.05 | 24443.15 | 291.55 | 177.72 | 545.5 | 314.98 | 59.33 | 0.11 | 14.2 | 25.1 | 15.66 | 3.82 | 36.15 | 25.91 | 61.31 | 50.58 | 1315.18 | 657.05 | 436.0 | 19.08 | 0.76 | 0.47 | 3.59 | 0.85 | 0.86 | 0.59 | 0.61 | 1.84 | 1.3 | 0.8 | 1.91 | 0.62 | 9.73 | 20874.72 | 282.94 | 6905.43 |
| 2 | Izmir, Turkey | 3.06 | 12.22 | 3.06 | 2.29 | 2.75 | 0.61 | 0.22 | 0.65 | 0.38 | 1.51 | 4.97 | 0.29 | 6.11 | 1.63 | 2.09 | 2.29 | 0.46 | 3.05 | 22.92 | 1.04 | 24443.15 | 212.22 | 128.36 | 353.76 | 225.34 | 51.07 | 0.09 | 12.89 | 20.64 | 11.67 | 3.06 | 33.2 | 22.48 | 52.74 | 42.2 | 856.54 | 508.14 | 392.07 | 22.23 | 0.61 | 0.57 | 3.51 | 0.77 | 0.73 | 0.52 | 0.57 | 1.56 | 1.31 | 0.7 | 1.78 | 0.58 | 8.61 | 20898.83 | 212.18 | 4948.41 |
| 3 | Helsinki, Finland | 12.0 | 65.0 | 8.0 | 6.5 | 6.75 | 2.66 | 1.89 | 0.96 | 2.27 | 2.02 | 6.87 | 1.54 | 12.0 | 2.23 | 2.95 | 7.7 | 2.8 | 9.42 | 59.7 | 1.54 | 22000.0 | 986.93 | 758.13 | 1819.32 | 1271.16 | 82.66 | 0.07 | 22.31 | 34.54 | 24.72 | 14.0 | 83.41 | 31.41 | 82.49 | 120.97 | 7444.54 | 4099.36 | 2382.56 | 1.38 | 6.0 | 1.0 | 47.04 | 2.1 | 1.75 | 0.91 | 2.3 | 3.87 | 2.13 | 2.91 | 1.61 | 1.25 | 12.34 | 24402.77 | 351.6 | 1641.0 |
| 4 | Chisinau, Moldova | 4.67 | 20.74 | 4.15 | 1.04 | 1.43 | 0.64 | 0.44 | 0.68 | 0.33 | 1.11 | 5.79 | 0.59 | 3.61 | 0.77 | 1.38 | 1.56 | 0.1 | 3.55 | 9.33 | 0.98 | 16335.12 | 244.49 | 177.96 | 428.67 | 330.84 | 113.46 | 0.07 | 8.58 | 26.64 | 12.1 | 5.19 | 56.54 | 35.37 | 86.25 | 94.38 | 904.82 | 589.13 | 286.95 | 9.42 | 1.3 | 0.18 | 3.11 | 0.7 | 1.22 | 0.56 | 0.84 | 1.25 | 0.93 | 1.56 | 1.37 | 0.59 | 5.37 | 17238.13 | 210.52 | 2679.3 |
# As colunas foram transformadas para objeto. Transformando novamente como numéricas:
df.iloc[:,1:] = df.iloc[:,1:].astype(float)
# Extraindo latitude e longitude
# Criando localizador
locator = geopy.Nominatim(user_agent='myGeocoder')
location = locator.geocode('Saint Petersburg, Russia')
print(f'Latitude:{location.latitude}, Logitude:{location.longitude}')
Latitude:59.917857350000006, Logitude:30.380619357025516
# limitador de tempo para requisição API
from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)
df['location'] = df['Location'].apply(geocode)
df['point'] = df['location'].apply(lambda loc: tuple(loc.point) if loc else None)
df[['latitude','longitude','altitude']] = pd.DataFrame(df['point'].tolist(), index=df.index)
df.drop(columns=['point','location','altitude'], inplace=True)
# Visualizando transformação:
df.iloc[:5,-3:]
| International Primary School, Yearly for 1 Child | latitude | longitude | |
|---|---|---|---|
| 0 | 5388.86 | 59.917857 | 30.380619 |
| 1 | 6905.43 | 41.009198 | 28.966219 |
| 2 | 4948.41 | 38.422455 | 27.131070 |
| 3 | 1641.00 | 60.167488 | 24.942747 |
| 4 | 2679.30 | 47.024512 | 28.832292 |
# Selecionando colunas para criação de atributos com as médias
def food(df):
return int(round((df[['Meal, Inexpensive Restaurant',
'Domestic Beer (0.5 liter draught)',
'Imported Beer (0.33 liter bottle)', 'Coke/Pepsi (0.33 liter bottle)',
'Water (0.33 liter bottle) ', 'Milk (regular), (1 liter)',
'Loaf of Fresh White Bread (500g)', 'Eggs (regular) (12)',
'Local Cheese (1kg)', 'Water (1.5 liter bottle)',
'Bottle of Wine (Mid-Range)', 'Domestic Beer (0.5 liter bottle)',
'Imported Beer (0.33 liter bottle)', 'Cigarettes 20 Pack (Marlboro)',
'Chicken Breasts (Boneless, Skinless), (1kg)', 'Apples (1kg)', 'Oranges (1kg)',
'Potato (1kg)', 'Lettuce (1 head)', 'Cappuccino (regular)',
'Rice (white), (1kg)', 'Tomato (1kg)', 'Banana (1kg)', 'Onion (1kg)',
'Beef Round (1kg) (or Equivalent Back Leg Red Meat)',]].mean()).mean()))
def travel(df):
return int(round((df[['One-way Ticket (Local Transport)',
'Monthly Pass (Regular Price)', 'Gasoline (1 liter)',
'Taxi Start (Normal Tariff)', 'Taxi 1km (Normal Tariff)',
'Taxi 1hour Waiting (Normal Tariff)',]].mean()).mean()))
def living(df):
return int(round((df[[ 'Volkswagen Golf',
'Apartment (1 bedroom) in City Centre',
'Apartment (1 bedroom) Outside of Centre',
'Apartment (3 bedrooms) in City Centre',
'Apartment (3 bedrooms) Outside of Centre',
'Basic (Electricity, Heating, Cooling, Water, Garbage) for 85m2 Apartment',
'Price per Square Meter to Buy Apartment in City Centre',
'Price per Square Meter to Buy Apartment Outside of Centre',
'Toyota Corolla 1.6l 97kW Comfort (Or Equivalent New Car)',]].mean()).mean()))
def lifestyle(df):
return int(round((df[['1 min. of Prepaid Mobile Tariff Local (No Discounts or Plans)',
'Internet (60 Mbps or More, Unlimited Data, Cable/ADSL)',
'Fitness Club, Monthly Fee for 1 Adult',
'Tennis Court Rent (1 Hour on Weekend)',
'Cinema, International Release, 1 Seat',
'1 Pair of Jeans (Levis 501 Or Similar)',
'1 Summer Dress in a Chain Store (Zara, H&M, ...)',
'1 Pair of Nike Running Shoes (Mid-Range)',
'1 Pair of Men Leather Business Shoes',
'Meal for 2 People, Mid-range Restaurant, Three-course',
'McMeal at McDonalds (or Equivalent Combo Meal)',]].mean()).mean()))
def education(df):
return int(round((df[['Preschool (or Kindergarten), Full Day, Private, Monthly for 1 Child',
'International Primary School, Yearly for 1 Child',]].mean()).mean()))
def income(df):
return int(round((df[['Average Monthly Net Salary (After Tax)',
'Mortgage Interest Rate in Percentages (%), Yearly, for 20 Years Fixed-Rate',]].mean()).mean()))
# Colunas com as médias de gastos (segmentado)
df['Food'] = df.apply(food, axis = 1)
df['Travel'] = df.apply(travel, axis = 1)
df['Living'] = df.apply(living, axis = 1)
df['Lifestyle'] = df.apply(lifestyle, axis = 1)
df['Education'] = df.apply(education, axis = 1)
df['Income'] = df.apply(income, axis = 1)
# Criando coluna para países
df['Country'] = df['Location'].str.split(', ')
df['Country'] = df['Country'].apply(lambda x: x[-1])
# Cidades e as médias de custos por classe
from ipywidgets import interact
@interact
def consulta(column = ['Food','Travel','Living','Lifestyle','Education','Income']):
return df[['Country','Food','Travel','Living','Lifestyle','Education','Income']]\
.groupby('Country').agg('mean')\
.sort_values(by=[column], ascending=False)\
.style.background_gradient(cmap='autumn_r')
interactive(children=(Dropdown(description='column', options=('Food', 'Travel', 'Living', 'Lifestyle', 'Educat…
# Gráficos - Top 5 países com maiores custos médios por categoria
from plotly.subplots import make_subplots
fig = make_subplots(rows=3, cols=2, subplot_titles=['Food','Travel','Living','Lifestyle','income','Education'])
fig.update_layout(height=1000, showlegend=False, title='Top 5 paises com custos mais altos')
df1 = df[['Country','Food']].groupby('Country').agg('mean').sort_values(by='Food', ascending=False).reset_index().head(5)
fig.add_bar(x=df1['Country'].head(5), y=df1['Food'], row=1, col=1)
df3 = df[['Country','Travel']].groupby('Country').agg('mean').sort_values(by='Travel', ascending=False).reset_index().head(5)
fig.add_bar(x=df3['Country'].head(5), y=df3['Travel'],row=1, col=2)
df4 = df[['Country','Living']].groupby('Country').agg('mean').sort_values(by='Living', ascending=False).reset_index().head(5)
fig.add_bar(x=df4['Country'].head(5), y=df4['Living'],row=2, col=1)
df5 = df[['Country','Lifestyle']].groupby('Country').agg('mean').sort_values(by='Lifestyle', ascending=False).reset_index().head(5)
fig.add_bar(x=df5['Country'].head(5), y=df5['Lifestyle'],row=2, col=2)
df6 = df[['Country','Income']].groupby('Country').agg('mean').sort_values(by='Income', ascending=False).reset_index().head(5)
fig.add_bar(x=df6['Country'].head(5), y=df6['Income'],row=3, col=1)
df7 = df[['Country','Education']].groupby('Country').agg('mean').sort_values(by='Education', ascending=False).reset_index().head(5)
fig.add_bar(x=df7['Country'].head(5), y=df7['Education'],row=3, col=2)
# Gráficos - Top 5 países com menores custos médios por categoria
from plotly.subplots import make_subplots
fig = make_subplots(rows=3, cols=2, subplot_titles=['Food','Travel','Living','Lifestyle','income','Education'])
fig.update_layout(height=1000, showlegend=False, title='Top 5 paises com custos mais baixos')
df1 = df[['Country','Food']].groupby('Country').agg('mean').sort_values(by='Food').reset_index().head(5)
fig.add_bar(x=df1['Country'].head(5), y=df1['Food'], row=1, col=1)
df3 = df[['Country','Travel']].groupby('Country').agg('mean').sort_values(by='Travel').reset_index().head(5)
fig.add_bar(x=df3['Country'].head(5), y=df3['Travel'],row=1, col=2)
df4 = df[['Country','Living']].groupby('Country').agg('mean').sort_values(by='Living').reset_index().head(5)
fig.add_bar(x=df4['Country'].head(5), y=df4['Living'],row=2, col=1)
df5 = df[['Country','Lifestyle']].groupby('Country').agg('mean').sort_values(by='Lifestyle').reset_index().head(5)
fig.add_bar(x=df5['Country'].head(5), y=df5['Lifestyle'],row=2, col=2)
df6 = df[['Country','Income']].groupby('Country').agg('mean').sort_values(by='Income').reset_index().head(5)
fig.add_bar(x=df6['Country'].head(5), y=df6['Income'],row=3, col=1)
df7 = df[['Country','Education']].groupby('Country').agg('mean').sort_values(by='Education').reset_index().head(5)
fig.add_bar(x=df7['Country'].head(5), y=df7['Education'],row=3, col=2)
# Classificando range dos valores por classes; maior -> menor
top_range = (df.describe().loc['min',:] / df.describe().loc['max',:]).iloc[-6:].sort_values().to_frame().reset_index()
top_range = top_range.rename(columns={'index':'Class', 0:'min / max'})
top_range['min / max'] = top_range['min / max'].apply(lambda x: round(1/x, 2))
top_range
| Class | min / max | |
|---|---|---|
| 0 | Education | 43.82 |
| 1 | Travel | 37.00 |
| 2 | Income | 36.31 |
| 3 | Food | 8.00 |
| 4 | Living | 7.64 |
| 5 | Lifestyle | 4.40 |
# Função para add cores
def color_circles(val):
if val <= df[item].quantile(.25):
return 'forestgreen'
elif val <= df[item].quantile(.50):
return 'goldenrod'
elif val <= df[item].quantile(.75):
return 'Tomato'
else:
return 'darkred'
# plot Análise geoespacial 1
map = folium.Map(location=[df['latitude'].mean(),
df['longitude'].mean()],
tiles="Stamen Terrain",
zoom_start=2)
item = top_range.iloc[0][0]
for i in range(0, len(df)):
Circle(location = [df.iloc[i]['latitude'], df.iloc[i]['longitude']],
radius=120000,
color=color_circles(df.iloc[i][item])).add_to(map)
print('Preço médio da ',item)
map
Preço médio da Education
# plot Análise geoespacial 2
map = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()],
zoom_start=2)
item = top_range.iloc[1][0]
for i in range(0,len(df)):
Circle(location=[df.iloc[i]['latitude'], df.iloc[i]['longitude']],
radius=90000,
color=color_circles(df.iloc[i][item])).add_to(map)
print('Preço médio da ',item)
map
Preço médio da Travel
# plot Análise geoespacial 3
map = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()],
tiles='Stamen Watercolor',
zoom_start=2)
item = top_range.iloc[2][0]
for i in range(0,len(df)):
Circle(location=[df.iloc[i]['latitude'], df.iloc[i]['longitude']],
radius=100000,
color= color_circles(df.iloc[i][item])).add_to(map)
print('Valor médio do ',item)
map
Valor médio do Income
# plot Análise geoespacial 4
map = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()],
tiles='CartoDB positron',
zoom_start=2)
item = top_range.iloc[3][0]
for i in range(0,len(df)):
Circle(location=[df.iloc[i]['latitude'], df.iloc[i]['longitude']],
radius=100000,
color= color_circles(df.iloc[i][item])).add_to(map)
print('Valor médio do ',item)
map
Valor médio do Food
# plot Análise geoespacial 5
map = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()],
tiles='CartoDB dark_matter',
zoom_start=2)
item = top_range.iloc[4][0]
for i in range(0,len(df)):
Circle(location=[df.iloc[i]['latitude'], df.iloc[i]['longitude']],
radius=100000,
tooltip=df.iloc[i]['Location'],
color= color_circles(df.iloc[i][item])).add_to(map)
print('Valor médio do ',item)
map
Valor médio do Living
from folium import plugins # adicionando um minimapa
# plot Análise geoespacial 6
map = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()],
tiles='Stamen Terrain',
zoom_start=3.5)
item = top_range.iloc[5][0]
for i in range(0,len(df)):
Circle(location=[df.iloc[i]['latitude'], df.iloc[i]['longitude']],
radius=100000,
tooltip=df.iloc[i]['Location'],
color= color_circles(df.iloc[i][item])).add_to(map)
minimap = plugins.MiniMap(tile_layer='Stamen Watercolor')
# map.add_Legend(title='Legenda', labels=['<= 25%', '<= 50%', '<=75%', '> 75%'], colors=['forestgreen','goldenrod','Tomato','Darkred'])
map.add_child(minimap)
print('Valor médio do ',item)
map
Valor médio do Lifestyle
# Verificando a relação entre as categorias criadas
df_class = df[['Country','Food','Living','Education','Income','Travel','Lifestyle','latitude','longitude']].groupby('Country').agg('mean').reset_index()
df_class.head(3)
| Country | Food | Living | Education | Income | Travel | Lifestyle | latitude | longitude | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | Albania | 2.0 | 4157.0 | 1225.0 | 171.0 | 5.0 | 29.0 | 41.328148 | 19.818444 |
| 1 | Argentina | 2.0 | 4346.0 | 1638.0 | 266.0 | 3.0 | 28.0 | -34.607568 | -58.437089 |
| 2 | Armenia | 2.0 | 4390.0 | 1581.0 | 154.0 | 3.0 | 30.0 | 40.177625 | 44.512617 |
correlation = df_class.iloc[:,1:7].corr()
plt.figure(figsize=(15,6))
sns.heatmap(correlation, cmap='mako_r', annot=True, linewidths=5, linecolor='DarkSlateGray')
plt.title('Correlação entre as classes de custo', size=18)
plt.yticks(rotation='horizontal')
plt.show()
# Comparação das classes entre as cidades de determinado país
df['City'] = df['Location'].str.split(', ')
df['City'] = df['City'].apply(lambda x: x[0])
# Analisando os Estados Unidos
df_US = df[df['Country']=='United States']
colunas = ['Food', 'Travel', 'Living', 'Lifestyle', 'Education','Income']
# plot
plt.figure(figsize=(16,20))
plt.suptitle('Estados Unidos', size=25)
for n, col in enumerate(colunas):
plt.subplot(3,2,n+1)
sns.barplot(y = df_US['City'], x= df_US[col], palette='mako')
plt.title(f'Classe analisada : {col}', size=16)
plt.ylabel(None)
plt.show()
# Analisando os Canada
df_CA = df[df['Country']=='Canada']
colunas = ['Food', 'Travel', 'Living', 'Lifestyle', 'Education','Income']
# plot
plt.figure(figsize=(16,20))
plt.suptitle('Canadá', size=25)
for n, col in enumerate(colunas):
plt.subplot(3,2,n+1)
sns.barplot(y = df_CA['City'], x= df_CA[col], palette='inferno_r')
plt.title(f'Classe analisada : {col}', size=16)
plt.ylabel(None)
plt.show()
# Analisando os India
df_IN = df[df['Country']=='India']
colunas = ['Food', 'Travel', 'Living', 'Lifestyle', 'Education','Income']
# plot
plt.figure(figsize=(16,20))
plt.suptitle('India', size=25)
for n, col in enumerate(colunas):
plt.subplot(3,2,n+1)
sns.barplot(y = df_IN['City'], x= df_IN[col], palette='Oranges_r')
plt.title(f'Classe analisada : {col}', size=16)
plt.ylabel(None)
plt.show()
# Qual a localização que possui a melhor pontuação geral para custo de vida:
df_best_city = df[['Location','Food', 'Travel', 'Living', 'Lifestyle', 'Education','Income','latitude','longitude']].copy()
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
features_scaled = scaler.fit_transform(df_best_city[['Food', 'Travel', 'Living', 'Lifestyle', 'Education','Income']])
lista=[]
for i in features_scaled:
numero = round(abs(i).mean(),3)
lista.append(numero)
df_best_city['Score'] = lista
# Top 10 cidades com maior custo de vida:
df_best_city[['Location','Score']].sort_values(by='Score', ascending=False).head(10)
| Location | Score | |
|---|---|---|
| 144 | Zurich, Switzerland | 0.790 |
| 78 | New York, NY, United States | 0.670 |
| 100 | San Francisco, CA, United States | 0.660 |
| 39 | Singapore, Singapore | 0.576 |
| 70 | London, United Kingdom | 0.559 |
| 128 | Reykjavik, Iceland | 0.557 |
| 102 | Seattle, WA, United States | 0.540 |
| 50 | Oslo, Norway | 0.534 |
| 47 | Boston, MA, United States | 0.526 |
| 71 | Los Angeles, CA, United States | 0.487 |
def color_Score(data, val):
if val <= data['Score'].quantile(.25):
return 'forestgreen'
elif val <= data['Score'].quantile(.50):
return 'goldenrod'
elif val <= data['Score'].quantile(.75):
return 'Tomato'
else:
return 'darkred'
top_10_mais = df_best_city[['Location','Score','latitude','longitude']].sort_values(by='Score', ascending=False).head(10)
map = folium.Map(location=[top_10_mais['latitude'].mean(), top_10_mais['longitude'].mean()],
tiles='CartoDB positron',
zoom_start=2)
for i in range(0,len(top_10_mais)):
Circle(location=[top_10_mais.iloc[i]['latitude'], top_10_mais.iloc[i]['longitude']],
radius=150000,
tooltip=[top_10_mais.iloc[i]['Location'],top_10_mais.iloc[i]['Score']],
color=color_Score(top_10_mais, top_10_mais.iloc[i]['Score'])).add_to(map)
map
# Top 10 cidades com menor custo de vida:
df_best_city[['Location','Score']].sort_values(by='Score', ascending=True).head(10)
| Location | Score | |
|---|---|---|
| 123 | Kolkata, India | 0.043 |
| 106 | Ahmedabad, India | 0.044 |
| 112 | Chennai, India | 0.056 |
| 63 | Tbilisi, Georgia | 0.058 |
| 118 | Hyderabad, India | 0.060 |
| 81 | Chandigarh, India | 0.064 |
| 130 | Pune, India | 0.069 |
| 82 | Colombo, Sri Lanka | 0.070 |
| 114 | Delhi, India | 0.071 |
| 127 | Mumbai, India | 0.072 |
top_10_menos = df_best_city[['Location','Score','latitude','longitude']].sort_values(by='Score', ascending=True).head(10)
map = folium.Map(location=[top_10_menos['latitude'].mean(), top_10_menos['longitude'].mean()],
tiles='CartoDB positron',
zoom_start=3)
for i in range(0,len(top_10_menos)):
Circle(location=[top_10_menos.iloc[i]['latitude'], top_10_menos.iloc[i]['longitude']],
radius=150000,
tooltip=[top_10_menos.iloc[i]['Location'],top_10_menos.iloc[i]['Score']],
color=color_Score(top_10_menos, top_10_menos.iloc[i]['Score'])).add_to(map)
map